# coding=utf-8
import os
import tqdm

root = './data/'
filePath = ["aishell_train.txt","aishell_test.txt","aishell_dev.txt"]

pin_list= []
for file in filePath:
    with open(root+file, "r", encoding="utf8") as f1:
        data = f1.readlines()
    for item in data:
        pin = item.split("\t")[1]
        # print(pin)
        pin_spilt = pin.split(' ')
        for pins in pin_spilt:
            pin_list.append(pins)
f1.close()
print(len(pin_list))
pin_list = list(set(pin_list))
print(len(pin_list))
outputFile = "./am_tokens.txt"
for pins in pin_list:
    print(pins)
    with open(outputFile, 'a') as f2:
        f2.write(pins+"\n")
    f2.close()
print("---结束----")
